library(jsonlite)
library(tidyverse)
library(ggplot2)
library(rworldmap)
library(soccermatics)
library(dplyr)
library(fmsb)
library(plotly)
library(lubridate)
library(networkD3)
library(ggrepel)
library(htmltools)
library(htmlwidgets)
library(scales)
matchDetails <- fromJSON("ChampionsLeagueFinalMatch.json", flatten = TRUE) #Import json file and convert it to a dataframe
matchEvents <- fromJSON("ChampionsLeagueFinalEvents.json", flatten = TRUE) #Import json file and convert it to a dataframe
matchLineUp <- fromJSON("ChampionsLeagueFinalLineup.json", flatten = TRUE) #Import json file and convert it to a dataframe
# Converted England and Scotland to UK because the map package groups them
# together under UK
matchLineUp[[3]][[2]]["country.name"][matchLineUp[[3]][[2]]["country.name"] == "England"] = "UK"
matchLineUp[[3]][[2]]["country.name"][matchLineUp[[3]][[2]]["country.name"] == "Scotland"] = "UK"
# Converted England to UK and Korea (South) to south korea
matchLineUp[[3]][[1]]["country.name"][matchLineUp[[3]][[1]]["country.name"] == "England"] = "UK"
matchLineUp[[3]][[1]]["country.name"][matchLineUp[[3]][[1]]["country.id"] == "121"] = "South Korea"
# Clean all location coordinates by splitting the X,Y,Z axis into separate
# fields. Event Location
for (i in 1:nrow(matchEvents)) {
if (is.numeric(matchEvents[[i, 10]])) {
matchEvents$location.x[i] = matchEvents[[i, 10]][1]
matchEvents$location.y[i] = matchEvents[[i, 10]][2]
} else {
matchEvents$location.x[i] = NA
matchEvents$location.y[i] = NA
}
}
# carry end location
for (i in 1:nrow(matchEvents)) {
if (is.numeric(matchEvents[[i, 56]])) {
matchEvents$carry.end_location.x[i] = matchEvents[[i, 56]][1]
matchEvents$carry.end_location.y[i] = matchEvents[[i, 56]][2]
} else {
matchEvents$carry.end_location.x[i] = NA
matchEvents$carry.end_location.y[i] = NA
}
}
# pass end location
for (i in 1:nrow(matchEvents)) {
if (is.numeric(matchEvents[[i, 31]])) {
matchEvents$pass.end_location.x[i] = matchEvents[[i, 31]][1]
matchEvents$pass.end_location.y[i] = matchEvents[[i, 31]][2]
} else {
matchEvents$pass.end_location.x[i] = NA
matchEvents$pass.end_location.y[i] = NA
}
}
# shot end location
for (i in 1:nrow(matchEvents)) {
if (is.numeric(matchEvents[[i, 69]])) {
matchEvents$shot.end_location.x[i] = matchEvents[[i, 69]][1]
matchEvents$shot.end_location.y[i] = matchEvents[[i, 69]][2]
matchEvents$shot.end_location.z[i] = matchEvents[[i, 69]][3]
} else {
matchEvents$shot.end_location.x[i] = NA
matchEvents$shot.end_location.y[i] = NA
matchEvents$shot.end_location.z[i] = NA
}
}
#Show a bubbble map of the player's nationality count
matchLineUp[[3]][[2]] %>% count(country.name) %>%
joinCountryData2Map(joinCode = "NAME",
nameJoinColumn = "country.name") %>%
mapBubbles( nameZSize="n"
, nameZColour="country.name"
,legendTitle ="No of Players"
#,colourPalette='topo', #rainbow
,oceanCol='lightblue', landCol='white'
,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8))
title(main="Liverpool Players Nationality")
par(mai=c(0.3,0.2,0.2,0),xaxs="i",yaxs="i")
#Show a bubbble map of the player's nationality count hignlighting just european countries
matchLineUp[[3]][[2]] %>% count(country.name) %>%
joinCountryData2Map(joinCode = "NAME",
nameJoinColumn = "country.name") %>%
mapBubbles( nameZSize="n"
, nameZColour="country.name"
,mapRegion = "eurasia" #where I specified the map to show just european countries
,legendTitle ="No of Players"
,oceanCol='lightblue', landCol='white'
,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8))
title(main="Liverpool Players Nationality showing the European Countries")
#Show a bubbble map of the player's nationality count
matchLineUp[[3]][[1]] %>% count(country.name) %>%
joinCountryData2Map(joinCode = "NAME",
nameJoinColumn = "country.name") %>%
mapBubbles( nameZSize="n"
, nameZColour="country.name"
,legendTitle ="No of Players"
#colourPalette='topo', #rainbow
,oceanCol='lightblue', landCol='white'
,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8) )
title(main="Tottenham Players Nationality ")
par(mai=c(0.3,0.2,0.2,0),xaxs="i",yaxs="i")
#Show a bubbble map of the player's nationality count hignlighting just european countries
matchLineUp[[3]][[1]] %>% count(country.name) %>%
joinCountryData2Map(joinCode = "NAME",
nameJoinColumn = "country.name") %>%
mapBubbles( nameZSize="n"
, nameZColour="country.name"
,mapRegion = "eurasia" #where I specified the map to show just european countries
,legendTitle ="No of Players"
,oceanCol='lightblue', landCol='white'
,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8))
title(main="Tottenham Players Nationality showing the European Countries")
# show a barplot of all player's nationality
matchLineUp[[3]][[1]] %>% bind_rows(matchLineUp[[3]][[2]]) %>% count(country.name) %>%
barplotCountryData(nameColumnToPlot = "n", nameCountryColumn = "country.name",
numPanels = 1, main = "Players Nationality Count in Final", catMethod = "categorical",
colourPalette = adjustcolor(rwmGetColours("palette", numColours = 5), alpha.f = 0.8))
ggplotly(ggplot(matchEvents, aes(x=type.name,fill=type.name)) +
geom_bar()+
facet_wrap(~team.name) + # shows two output for each team
theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
labs(#subtitle="",
y="Number of events",
x="Event Name",
title="Bar Chart of match events per Team"
)
)
ggplotly(ggplot(matchEvents, aes(x=play_pattern.name,fill=play_pattern.name)) +
geom_bar()+
facet_wrap(~team.name) + # shows two output for each team
theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
labs(#subtitle="",
y="Number of events",
x="Play Pattern Name",
title="Bar Chart of Play pattern per Team"
)
)
ggplotly(
matchEvents %>%
filter(type.name=="Pass") %>%
ggplot(aes(x=pass.height.name,fill=pass.body_part.name)) +
geom_bar()+
facet_wrap(~team.name) +
theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
labs(#subtitle="",
y="Number of events",
x="Pass Type",
title="Bar Chart of pass type by body part per team "
)
)
ggplotly(
matchEvents %>%
filter(type.name=="Shot") %>% # filter for only shots
ggplot(aes(x=type.name,fill=shot.type.name)) +
geom_bar()+
facet_wrap(~team.name) +
theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
labs(#subtitle="",
y="Number of events",
x="Play Pattern Name",
title="Bar Chart of match events per each play pattern for each team"
)
)
ggplotly(
matchEvents %>%
filter(type.name=="Dribble") %>%
ggplot(aes(x=type.name,fill=dribble.outcome.name)) +
geom_bar()+
facet_wrap(~team.name) +
theme(axis.text.x = element_text(angle=90)) +
labs(#subtitle="",
y="Number of Dribles",
x="Dribbles",
title="Bar Chart of Dribbles for each team"
)
)
ggplotly(matchEvents %>%
filter(period==1) %>% # filter for just the first half
mutate(timestamp=parse_date_time(timestamp, orders = "HMS")) %>% #convert the timestamp field to a date_time class
ggplot(aes(x = timestamp, y = possession_team.id)) +
geom_line(color = "grey",
size=0.5 ) +
scale_x_datetime(date_breaks = "5 min",labels = date_format("%H:%M:%S")) + # Use the hour,minute and second as the label for the x axis
labs(title = "Ball Possession between both teams in the 1st Half",
subtitle = "",
x = "Time",
y = "Teams") +
theme_minimal() +
scale_y_continuous(breaks = c(24,38),
labels=c("Liverpool","Tottenham")) +
facet_wrap(~period) +
theme(axis.text.x = element_text(angle=90))
)
ggplotly(matchEvents %>%
filter(period==2) %>% # filter for just the first half
mutate(timestamp=parse_date_time(timestamp, orders = "HMS")) %>% #convert the timestamp field to a date_time class
ggplot(aes(x = timestamp, y = possession_team.id)) +
geom_line(color = "grey",
size=0.5 ) +
scale_x_datetime(date_breaks = "5 min",labels = date_format("%H:%M:%S")) + # Use the hour,minute and second as the label for the x axis
labs(title = "Ball Possession between both teams in the 2nd Half",
subtitle = "",
x = "Time",
y = "Teams") +
theme_minimal() +
scale_y_continuous(breaks = c(24,38),
labels=c("Liverpool","Tottenham")) +
facet_wrap(~period) +
theme(axis.text.x = element_text(angle=90))
)
liverpoolShots<-matchEvents %>%
filter(team.name == "Liverpool",type.name=="Shot")
ggplotly(
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",theme="grass",
title = "",
subtitle = "") +
geom_point(data = liverpoolShots, aes(x = location.x, y = location.y,fill=shot.outcome.name),size=4,shape = 21) + #where I included the coorinates for the shot
scale_y_reverse() + #reverses the y axis of the pitch as it was inverted
ggtitle("Shotmap",
"Shotmap of Liverpool")
)
liverpoolShot <- matchEvents %>% #Cant show 3d
filter(type.name == "Shot" & team.name == "Liverpool")
ggplotly(
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",theme="grass",
title = "",
subtitle = "shot map") +
geom_segment(data = liverpoolShot, aes(x = location.x, xend = shot.end_location.x, y = location.y, yend = shot.end_location.y, col = shot.outcome.name)) +
geom_point(data = liverpoolShot, aes(x = location.x, y = location.y, col = shot.outcome.name), size=4) +
scale_y_reverse()
)
tottShots <- matchEvents %>% filter(team.name == "Tottenham Hotspur", type.name ==
"Shot")
ggplotly(soccerPitch(lengthPitch = 120, widthPitch = 80, arrow = "r", theme = "grass",
title = "", subtitle = "") + geom_point(data = tottShots, aes(x = location.x,
y = location.y, fill = shot.outcome.name), size = 4, shape = 21) + scale_y_reverse() +
ggtitle("Shotmap", "Shotmap of Tottenham"))
tottenhamShot <- matchEvents %>% filter(type.name == "Shot" & team.name == "Tottenham Hotspur")
ggplotly(soccerPitch(lengthPitch = 120, widthPitch = 80, arrow = "r", theme = "grass",
title = "", subtitle = "Shot map") + geom_segment(data = tottenhamShot, aes(x = location.x,
xend = shot.end_location.x, y = location.y, yend = shot.end_location.y, col = shot.outcome.name)) +
geom_point(data = tottenhamShot, aes(x = location.x, y = location.y, col = shot.outcome.name),
size = 4) + scale_y_reverse())
matchEvents %>% #This excludes pass outcomes of types "Injury Clearance" and "Unknown"
filter(team.name == "Liverpool",type.name=="Pass") %>%
soccerPassmap(fill = "lightblue", arrow = "r",theme="grass",
title = "",lengthPitch=120,widthPitch=80) +
scale_y_reverse()
matchEvents %>% #This excludes pass outcomes of types "Injury Clearance" and "Unknown"
filter(team.name == "Tottenham Hotspur",type.name=="Pass") %>%
soccerPassmap(fill = "lightblue", arrow = "r",theme="grass",
title = "",lengthPitch=120,widthPitch=80) +
scale_y_reverse()
visualizePassesPerPlayer<- function(player_name){ # function to visualize passes per player
links<- matchEvents %>% # Dataframe to store player name, recipients of passes and number of passes made.
filter(type.name=="Pass",player.name == player_name,is.na(pass.recipient.name)== F) %>%
select(player.name,pass.recipient.name) %>%
count(player.name,pass.recipient.name)
# Dataframe to store all unique players, either a recipient of a pass or the person who made the pass
nodes <- data.frame(
name=c(as.character(links$player.name),
as.character(links$pass.recipient.name)) %>% unique()
)
# The ID to be used to link the passer and the recipient of the pass
links$IDsource <- match(links$player.name, nodes$name)-1 #returns the position of the player name in the dataframe that stores all players
links$IDtarget <- match(links$pass.recipient.name, nodes$name)-1 #returns the position of the recipient name in the dataframe that stores all players
# Create the Network
passNetwork <- sankeyNetwork(Links = links, Nodes = nodes,
Source = "IDsource", Target = "IDtarget",
Value = "n", NodeID = "name",
sinksRight=FALSE,
width= 900, height=600,
fontSize= 12, nodeWidth = 15)
passNetwork <- prependContent(passNetwork, tags$h3("Total Passes Made to each Player")) # Add a title to the Network Diagram
return(passNetwork)
}
visualizePassesPerPlayer("Alisson Ramsés Becker")
# Nmber of shots each player had
ggplotly(matchEvents %>% filter(type.name == "Shot") %>% count(player.name, team.name) %>%
ggplot(aes(n, player.name, color = team.name)) + geom_point(size = 5) + labs(x = "Number of Shots",
title = "No of Shots Per Player", subtitle = "") + theme_minimal())
# Function to show Touch map of each player
playerTouchMap <- function(playerName) {
ggplotly(matchEvents %>% filter(player.name == playerName) %>% select(location.x,
location.y) %>% rename(x = location.x, y = location.y) %>% soccerPath(col = "red",
lengthPitch = 120, widthPitch = 80, arrow = "r", lwd = 0.5) + scale_y_reverse() +
labs(title = "Player's Touch Map", subtitle = ""))
}
playerTouchMap("Mohamed Salah")
# Average player position for Tottenham
tottAvgPos <- matchEvents %>% filter(team.name == "Tottenham Hotspur", is.na(player.name) ==
F) %>% group_by(player.name) %>% summarise(x = mean(location.x, na.rm = T), y = mean(location.y,
na.rm = T)) #Compute the average(mean) X and Y coordinates for the players
soccerPitch(lengthPitch = 120, widthPitch = 80, arrow = "r", theme = "grass", title = "",
subtitle = "") + geom_point(data = tottAvgPos, aes(x = x, y = y), size = 4, shape = 21,
fill = "blue") + scale_y_reverse() + ggtitle("Average Positions", "Average Position Per Player") +
geom_label_repel(data = tottAvgPos, aes(x, y, label = str_split(player.name,
" ", simplify = T)[, 2]), fontface = "bold", size = 4) #Add a label for the player names and avoid the names from overlapping across each other.
# Average player position for Liverpool
livAvgPos <- matchEvents %>% filter(team.name == "Liverpool", is.na(player.name) ==
F) %>% group_by(player.name) %>% summarise(x = mean(location.x, na.rm = T), y = mean(location.y,
na.rm = T)) #Compute the average(mean) X and Y coordinates for the players
soccerPitch(lengthPitch = 120, widthPitch = 80, arrow = "r", title = "", subtitle = "") +
geom_point(data = livAvgPos, aes(x = x, y = y), size = 4, shape = 21, fill = "red") +
scale_y_reverse() + ggtitle("Average Positions", "Average Position Per Player") +
geom_label_repel(data = livAvgPos, aes(x, y, label = str_split(player.name, " ",
simplify = T)[, 2]), fontface = "bold", size = 4) #Add a label for the player names and avoid the names from overlapping across each other.
#Comparison of Salah and Kane
salVsKane<-matchEvents %>%
count(player.name,type.name) %>%
filter(player.name %in% c("Harry Kane","Mohamed Salah"),type.name %in% c("Ball Receipt*","Ball Recovery","Carry","Dispossessed","Duel","Pass","Pressure","Shot")) %>% # List of qualities to compare both players on
arrange(player.name,type.name)
data <- as.data.frame(matrix( salVsKane$n , ncol=8,byrow = T)) #turn the data into a matrix with each quality being a column
colnames(data) <- c("Ball Receipt","Ball Recovery","Carry","Dispossessed","Duel","Pass","Pressure","Shot")
rownames(data) <- c("Kane","Salah")
# Include two new rows to the dataframe to specify the maximum and minimum value for the qualities being assessed. maximum of 60 and minimum of 0. Replicated 8 times across each column.
data <- rbind(rep(60,8) , rep(0,8) , data)
# Specify Colors
colors_border=c( rgb(0.2,0.5,0.5,0.9), rgb(0.8,0.2,0.5,0.9) , rgb(0.7,0.5,0.1,0.9) )
colors_in=c( rgb(0.2,0.5,0.5,0.4), rgb(0.8,0.2,0.5,0.4) , rgb(0.7,0.5,0.1,0.4) )
# plot the radar chart
radarchart(data, axistype=1 ,
pcol=colors_border , pfcol=colors_in , plwd=4 , plty=1,
cglcol="grey", cglty=1, axislabcol="grey", caxislabels=seq(0,60,15), cglwd=0.8,
vlcex=0.8 )
# Add a legend to the chart
legend(x=1, y=1, legend = rownames(data[-c(1,2),]), bty = "n", pch=20 , col=colors_in , text.col = "grey", cex=1.2, pt.cex=3)
#Visualise Substitutions made
# Function to visualize all the substitutions made in the game and the time it was made
visualizeSubsMade<- function(){
sub_links<- matchEvents %>%
filter(type.name=="Substitution",minute<=100) %>% #Include all substitutions on or before the 100th minute
select(player.name,substitution.replacement.name,minute)
if (nrow(sub_links) > 0) { #Condition to return "No Substitution" if none were made before the time
#All unique players involved in the substitutions made
sub_nodes <- data.frame(
name=c(as.character(sub_links$player.name),
as.character(sub_links$substitution.replacement.name)) %>% unique()
)
#ID to link the substituted player with the incoming player
sub_links$IDsource <- match(sub_links$player.name, sub_nodes$name)-1
sub_links$IDtarget <- match(sub_links$substitution.replacement.name, sub_nodes$name)-1
# Assign colors to show player's being substituted and their replacements
color <- 'd3.scaleOrdinal() .domain(["Roberto Firmino Barbosa de Oliveira", "Georginio Wijnaldum","Harry Winks",
"Moussa Sissoko", "Bamidele Alli", "Sadio Mané", "Divock Okoth Origi", "James Philip Milner","Lucas Rodrigues Moura da Silva",
"Eric Dier","Fernando Llorente Torres","Joe Gomez"]) .range(["red", "red" , "red", "red", "red", "red", "green", "green", "green", "green"
, "green", "green"])'
# Create the network
subsMadeNetwork <- sankeyNetwork(Links = sub_links, Nodes = sub_nodes,
Source = "IDsource", Target = "IDtarget",
Value = "minute", NodeID = "name",
sinksRight=F,
width= 900, height=600,
fontSize= 12, nodeWidth = 15,
colourScale=color)
subsMadeNetwork <- prependContent(subsMadeNetwork, tags$h1("Substitutions made"))
return(subsMadeNetwork)}
else{
print("No Substitution")
}
}
visualizeSubsMade()
knitr::opts_chunk$set(echo = TRUE,message=FALSE,warning=FALSE,error=FALSE,tidy=TRUE)
library(jsonlite)
library(tidyverse)
library(ggplot2)
library(rworldmap)
library(soccermatics)
library(dplyr)
library(fmsb)
library(plotly)
library(lubridate)
library(networkD3)
library(ggrepel)
library(htmltools)
library(htmlwidgets)
library(scales)
matchDetails <- fromJSON("ChampionsLeagueFinalMatch.json", flatten=TRUE) #Import json file and convert it to a dataframe
matchEvents <- fromJSON("ChampionsLeagueFinalEvents.json", flatten=TRUE) #Import json file and convert it to a dataframe
matchLineUp <- fromJSON("ChampionsLeagueFinalLineup.json", flatten=TRUE) #Import json file and convert it to a dataframe
#Converted England and Scotland to UK because the map package groups them together under UK
matchLineUp[[3]][[2]]["country.name"][matchLineUp[[3]][[2]]["country.name"]=="England"]="UK"
matchLineUp[[3]][[2]]["country.name"][matchLineUp[[3]][[2]]["country.name"]=="Scotland"]="UK"
#Converted England to UK and Korea (South) to south korea
matchLineUp[[3]][[1]]["country.name"][matchLineUp[[3]][[1]]["country.name"]=="England"]="UK"
matchLineUp[[3]][[1]]["country.name"][matchLineUp[[3]][[1]]["country.id"]=="121"]="South Korea"
#Clean all location coordinates by splitting the X,Y,Z axis into separate fields.
#Event Location
for (i in 1:nrow(matchEvents)){
if (is.numeric(matchEvents[[i,10]])){
matchEvents$location.x[i]=matchEvents[[i,10]][1]
matchEvents$location.y[i]=matchEvents[[i,10]][2]
}
else{
matchEvents$location.x[i]=NA
matchEvents$location.y[i]=NA
}
}
#carry end location
for (i in 1:nrow(matchEvents)){
if (is.numeric(matchEvents[[i,56]])){
matchEvents$carry.end_location.x[i]=matchEvents[[i,56]][1]
matchEvents$carry.end_location.y[i]=matchEvents[[i,56]][2]
}
else{
matchEvents$carry.end_location.x[i]=NA
matchEvents$carry.end_location.y[i]=NA
}
}
#pass end location
for (i in 1:nrow(matchEvents)){
if (is.numeric(matchEvents[[i,31]])){
matchEvents$pass.end_location.x[i]=matchEvents[[i,31]][1]
matchEvents$pass.end_location.y[i]=matchEvents[[i,31]][2]
}
else{
matchEvents$pass.end_location.x[i]=NA
matchEvents$pass.end_location.y[i]=NA
}
}
#shot end location
for (i in 1:nrow(matchEvents)){
if (is.numeric(matchEvents[[i,69]])){
matchEvents$shot.end_location.x[i]=matchEvents[[i,69]][1]
matchEvents$shot.end_location.y[i]=matchEvents[[i,69]][2]
matchEvents$shot.end_location.z[i]=matchEvents[[i,69]][3]
}
else{
matchEvents$shot.end_location.x[i]=NA
matchEvents$shot.end_location.y[i]=NA
matchEvents$shot.end_location.z[i]=NA
}
}
#Show a bubbble map of the player's nationality count
matchLineUp[[3]][[2]] %>% count(country.name) %>%
joinCountryData2Map(joinCode = "NAME",
nameJoinColumn = "country.name") %>%
mapBubbles( nameZSize="n"
, nameZColour="country.name"
,legendTitle ="No of Players"
#,colourPalette='topo', #rainbow
,oceanCol='lightblue', landCol='white'
,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8))
title(main="Liverpool Players Nationality")
par(mai=c(0.3,0.2,0.2,0),xaxs="i",yaxs="i")
#Show a bubbble map of the player's nationality count hignlighting just european countries
matchLineUp[[3]][[2]] %>% count(country.name) %>%
joinCountryData2Map(joinCode = "NAME",
nameJoinColumn = "country.name") %>%
mapBubbles( nameZSize="n"
, nameZColour="country.name"
,mapRegion = "eurasia" #where I specified the map to show just european countries
,legendTitle ="No of Players"
,oceanCol='lightblue', landCol='white'
,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8))
title(main="Liverpool Players Nationality showing the European Countries")
#Show a bubbble map of the player's nationality count
matchLineUp[[3]][[1]] %>% count(country.name) %>%
joinCountryData2Map(joinCode = "NAME",
nameJoinColumn = "country.name") %>%
mapBubbles( nameZSize="n"
, nameZColour="country.name"
,legendTitle ="No of Players"
#colourPalette='topo', #rainbow
,oceanCol='lightblue', landCol='white'
,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8) )
title(main="Tottenham Players Nationality ")
par(mai=c(0.3,0.2,0.2,0),xaxs="i",yaxs="i")
#Show a bubbble map of the player's nationality count hignlighting just european countries
matchLineUp[[3]][[1]] %>% count(country.name) %>%
joinCountryData2Map(joinCode = "NAME",
nameJoinColumn = "country.name") %>%
mapBubbles( nameZSize="n"
, nameZColour="country.name"
,mapRegion = "eurasia" #where I specified the map to show just european countries
,legendTitle ="No of Players"
,oceanCol='lightblue', landCol='white'
,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8))
title(main="Tottenham Players Nationality showing the European Countries")
# show a barplot of all player's nationality
matchLineUp[[3]][[1]] %>%
bind_rows(matchLineUp[[3]][[2]]) %>%
count(country.name) %>%
barplotCountryData(nameColumnToPlot="n"
, nameCountryColumn = "country.name"
,numPanels = 1
,main = "Players Nationality Count in Final"
,catMethod = "categorical"
,colourPalette=adjustcolor(rwmGetColours("palette", numColours=5), alpha.f = 0.8)
)
ggplotly(ggplot(matchEvents, aes(x=type.name,fill=type.name)) +
geom_bar()+
facet_wrap(~team.name) + # shows two output for each team
theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
labs(#subtitle="",
y="Number of events",
x="Event Name",
title="Bar Chart of match events per Team"
)
)
ggplotly(ggplot(matchEvents, aes(x=play_pattern.name,fill=play_pattern.name)) +
geom_bar()+
facet_wrap(~team.name) + # shows two output for each team
theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
labs(#subtitle="",
y="Number of events",
x="Play Pattern Name",
title="Bar Chart of Play pattern per Team"
)
)
ggplotly(
matchEvents %>%
filter(type.name=="Pass") %>%
ggplot(aes(x=pass.height.name,fill=pass.body_part.name)) +
geom_bar()+
facet_wrap(~team.name) +
theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
labs(#subtitle="",
y="Number of events",
x="Pass Type",
title="Bar Chart of pass type by body part per team "
)
)
ggplotly(
matchEvents %>%
filter(type.name=="Shot") %>% # filter for only shots
ggplot(aes(x=type.name,fill=shot.type.name)) +
geom_bar()+
facet_wrap(~team.name) +
theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
labs(#subtitle="",
y="Number of events",
x="Play Pattern Name",
title="Bar Chart of match events per each play pattern for each team"
)
)
ggplotly(
matchEvents %>%
filter(type.name=="Dribble") %>%
ggplot(aes(x=type.name,fill=dribble.outcome.name)) +
geom_bar()+
facet_wrap(~team.name) +
theme(axis.text.x = element_text(angle=90)) +
labs(#subtitle="",
y="Number of Dribles",
x="Dribbles",
title="Bar Chart of Dribbles for each team"
)
)
ggplotly(matchEvents %>%
filter(period==1) %>% # filter for just the first half
mutate(timestamp=parse_date_time(timestamp, orders = "HMS")) %>% #convert the timestamp field to a date_time class
ggplot(aes(x = timestamp, y = possession_team.id)) +
geom_line(color = "grey",
size=0.5 ) +
scale_x_datetime(date_breaks = "5 min",labels = date_format("%H:%M:%S")) + # Use the hour,minute and second as the label for the x axis
labs(title = "Ball Possession between both teams in the 1st Half",
subtitle = "",
x = "Time",
y = "Teams") +
theme_minimal() +
scale_y_continuous(breaks = c(24,38),
labels=c("Liverpool","Tottenham")) +
facet_wrap(~period) +
theme(axis.text.x = element_text(angle=90))
)
ggplotly(matchEvents %>%
filter(period==2) %>% # filter for just the first half
mutate(timestamp=parse_date_time(timestamp, orders = "HMS")) %>% #convert the timestamp field to a date_time class
ggplot(aes(x = timestamp, y = possession_team.id)) +
geom_line(color = "grey",
size=0.5 ) +
scale_x_datetime(date_breaks = "5 min",labels = date_format("%H:%M:%S")) + # Use the hour,minute and second as the label for the x axis
labs(title = "Ball Possession between both teams in the 2nd Half",
subtitle = "",
x = "Time",
y = "Teams") +
theme_minimal() +
scale_y_continuous(breaks = c(24,38),
labels=c("Liverpool","Tottenham")) +
facet_wrap(~period) +
theme(axis.text.x = element_text(angle=90))
)
liverpoolShots<-matchEvents %>%
filter(team.name == "Liverpool",type.name=="Shot")
ggplotly(
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",theme="grass",
title = "",
subtitle = "") +
geom_point(data = liverpoolShots, aes(x = location.x, y = location.y,fill=shot.outcome.name),size=4,shape = 21) + #where I included the coorinates for the shot
scale_y_reverse() + #reverses the y axis of the pitch as it was inverted
ggtitle("Shotmap",
"Shotmap of Liverpool")
)
liverpoolShot <- matchEvents %>% #Cant show 3d
filter(type.name == "Shot" & team.name == "Liverpool")
ggplotly(
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",theme="grass",
title = "",
subtitle = "shot map") +
geom_segment(data = liverpoolShot, aes(x = location.x, xend = shot.end_location.x, y = location.y, yend = shot.end_location.y, col = shot.outcome.name)) +
geom_point(data = liverpoolShot, aes(x = location.x, y = location.y, col = shot.outcome.name), size=4) +
scale_y_reverse()
)
tottShots<-matchEvents %>%
filter(team.name == "Tottenham Hotspur",type.name=="Shot")
ggplotly(
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",theme="grass",
title = "",
subtitle = "") +
geom_point(data = tottShots, aes(x = location.x, y = location.y,fill=shot.outcome.name),size=4,shape = 21) +
scale_y_reverse() +
ggtitle("Shotmap",
"Shotmap of Tottenham")
)
tottenhamShot <- matchEvents %>%
filter(type.name == "Shot" & team.name == "Tottenham Hotspur")
ggplotly(
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",theme="grass",
title = "",
subtitle = "Shot map") +
geom_segment(data = tottenhamShot, aes(x = location.x, xend = shot.end_location.x, y = location.y, yend = shot.end_location.y, col = shot.outcome.name)) +
geom_point(data = tottenhamShot, aes(x = location.x, y = location.y, col = shot.outcome.name), size=4) +
scale_y_reverse()
)
matchEvents %>% #This excludes pass outcomes of types "Injury Clearance" and "Unknown"
filter(team.name == "Liverpool",type.name=="Pass") %>%
soccerPassmap(fill = "lightblue", arrow = "r",theme="grass",
title = "",lengthPitch=120,widthPitch=80) +
scale_y_reverse()
matchEvents %>% #This excludes pass outcomes of types "Injury Clearance" and "Unknown"
filter(team.name == "Tottenham Hotspur",type.name=="Pass") %>%
soccerPassmap(fill = "lightblue", arrow = "r",theme="grass",
title = "",lengthPitch=120,widthPitch=80) +
scale_y_reverse()
visualizePassesPerPlayer<- function(player_name){ # function to visualize passes per player
links<- matchEvents %>% # Dataframe to store player name, recipients of passes and number of passes made.
filter(type.name=="Pass",player.name == player_name,is.na(pass.recipient.name)== F) %>%
select(player.name,pass.recipient.name) %>%
count(player.name,pass.recipient.name)
# Dataframe to store all unique players, either a recipient of a pass or the person who made the pass
nodes <- data.frame(
name=c(as.character(links$player.name),
as.character(links$pass.recipient.name)) %>% unique()
)
# The ID to be used to link the passer and the recipient of the pass
links$IDsource <- match(links$player.name, nodes$name)-1 #returns the position of the player name in the dataframe that stores all players
links$IDtarget <- match(links$pass.recipient.name, nodes$name)-1 #returns the position of the recipient name in the dataframe that stores all players
# Create the Network
passNetwork <- sankeyNetwork(Links = links, Nodes = nodes,
Source = "IDsource", Target = "IDtarget",
Value = "n", NodeID = "name",
sinksRight=FALSE,
width= 900, height=600,
fontSize= 12, nodeWidth = 15)
passNetwork <- prependContent(passNetwork, tags$h3("Total Passes Made to each Player")) # Add a title to the Network Diagram
return(passNetwork)
}
visualizePassesPerPlayer("Alisson Ramsés Becker")
# Nmber of shots each player had
ggplotly(
matchEvents %>%
filter(type.name=="Shot") %>%
count(player.name,team.name) %>%
ggplot(aes(n, player.name,color=team.name)) +
geom_point(size = 5) +
labs(x = "Number of Shots",
title = "No of Shots Per Player",
subtitle = "") +
theme_minimal()
)
#Function to show Touch map of each player
playerTouchMap<- function(playerName){ggplotly(
matchEvents %>%
filter(player.name == playerName) %>%
select(location.x,location.y) %>%
rename(x=location.x,y=location.y) %>%
soccerPath(col="red",lengthPitch = 120,widthPitch = 80,arrow = "r",lwd=0.5) +
scale_y_reverse() +
labs(title = "Player's Touch Map",
subtitle = "")
)
}
playerTouchMap("Mohamed Salah")
#Average player position for Tottenham
tottAvgPos<-matchEvents %>%
filter(team.name == "Tottenham Hotspur",is.na(player.name)==F) %>%
group_by(player.name) %>%
summarise(x=mean(location.x,na.rm =T),y=mean(location.y,na.rm =T)) #Compute the average(mean) X and Y coordinates for the players
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",theme="grass",
title = "",
subtitle = "") +
geom_point(data = tottAvgPos, aes(x = x, y = y),size=4,shape = 21,fill="blue") +
scale_y_reverse() +
ggtitle("Average Positions",
"Average Position Per Player") +
geom_label_repel(data=tottAvgPos,aes(x,y,label=str_split(player.name," ",simplify =T)[,2]),fontface = "bold",size=4) #Add a label for the player names and avoid the names from overlapping across each other.
#Average player position for Liverpool
livAvgPos<-matchEvents %>%
filter(team.name == "Liverpool",is.na(player.name)==F) %>%
group_by(player.name) %>%
summarise(x=mean(location.x,na.rm =T),y=mean(location.y,na.rm =T)) #Compute the average(mean) X and Y coordinates for the players
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",
title = "",
subtitle = "") +
geom_point(data = livAvgPos, aes(x = x, y = y),size=4,shape = 21,fill="red") +
scale_y_reverse() +
ggtitle("Average Positions",
"Average Position Per Player") +
geom_label_repel(data=livAvgPos,aes(x,y,label=str_split(player.name," ",simplify =T)[,2]),fontface = "bold",size=4) #Add a label for the player names and avoid the names from overlapping across each other.
#Comparison of Salah and Kane
salVsKane<-matchEvents %>%
count(player.name,type.name) %>%
filter(player.name %in% c("Harry Kane","Mohamed Salah"),type.name %in% c("Ball Receipt*","Ball Recovery","Carry","Dispossessed","Duel","Pass","Pressure","Shot")) %>% # List of qualities to compare both players on
arrange(player.name,type.name)
data <- as.data.frame(matrix( salVsKane$n , ncol=8,byrow = T)) #turn the data into a matrix with each quality being a column
colnames(data) <- c("Ball Receipt","Ball Recovery","Carry","Dispossessed","Duel","Pass","Pressure","Shot")
rownames(data) <- c("Kane","Salah")
# Include two new rows to the dataframe to specify the maximum and minimum value for the qualities being assessed. maximum of 60 and minimum of 0. Replicated 8 times across each column.
data <- rbind(rep(60,8) , rep(0,8) , data)
# Specify Colors
colors_border=c( rgb(0.2,0.5,0.5,0.9), rgb(0.8,0.2,0.5,0.9) , rgb(0.7,0.5,0.1,0.9) )
colors_in=c( rgb(0.2,0.5,0.5,0.4), rgb(0.8,0.2,0.5,0.4) , rgb(0.7,0.5,0.1,0.4) )
# plot the radar chart
radarchart(data, axistype=1 ,
pcol=colors_border , pfcol=colors_in , plwd=4 , plty=1,
cglcol="grey", cglty=1, axislabcol="grey", caxislabels=seq(0,60,15), cglwd=0.8,
vlcex=0.8 )
# Add a legend to the chart
legend(x=1, y=1, legend = rownames(data[-c(1,2),]), bty = "n", pch=20 , col=colors_in , text.col = "grey", cex=1.2, pt.cex=3)
#Visualise Substitutions made
# Function to visualize all the substitutions made in the game and the time it was made
visualizeSubsMade<- function(){
sub_links<- matchEvents %>%
filter(type.name=="Substitution",minute<=100) %>% #Include all substitutions on or before the 100th minute
select(player.name,substitution.replacement.name,minute)
if (nrow(sub_links) > 0) { #Condition to return "No Substitution" if none were made before the time
#All unique players involved in the substitutions made
sub_nodes <- data.frame(
name=c(as.character(sub_links$player.name),
as.character(sub_links$substitution.replacement.name)) %>% unique()
)
#ID to link the substituted player with the incoming player
sub_links$IDsource <- match(sub_links$player.name, sub_nodes$name)-1
sub_links$IDtarget <- match(sub_links$substitution.replacement.name, sub_nodes$name)-1
# Assign colors to show player's being substituted and their replacements
color <- 'd3.scaleOrdinal() .domain(["Roberto Firmino Barbosa de Oliveira", "Georginio Wijnaldum","Harry Winks",
"Moussa Sissoko", "Bamidele Alli", "Sadio Mané", "Divock Okoth Origi", "James Philip Milner","Lucas Rodrigues Moura da Silva",
"Eric Dier","Fernando Llorente Torres","Joe Gomez"]) .range(["red", "red" , "red", "red", "red", "red", "green", "green", "green", "green"
, "green", "green"])'
# Create the network
subsMadeNetwork <- sankeyNetwork(Links = sub_links, Nodes = sub_nodes,
Source = "IDsource", Target = "IDtarget",
Value = "minute", NodeID = "name",
sinksRight=F,
width= 900, height=600,
fontSize= 12, nodeWidth = 15,
colourScale=color)
subsMadeNetwork <- prependContent(subsMadeNetwork, tags$h1("Substitutions made"))
return(subsMadeNetwork)}
else{
print("No Substitution")
}
}
visualizeSubsMade()
“F1 Data Analysis.” n.d. Accessed March 5, 2021. https://kaggle.com/jonathanbouchet/f1-data-analysis.
Gallagher, Joe. 2021. “JoGall/Soccermatics.” https://github.com/JoGall/soccermatics.
Holtz, Yan. n.d.a. “Basic Radar Chart.” Accessed March 5, 2021. https://www.r-graph-gallery.com/142-basic-radar-chart.html.
———. n.d.b. “Most Basic Sankey Diagram.” Accessed March 5, 2021. https://www.r-graph-gallery.com/321-introduction-to-interactive-sankey-diagram-2.html.
Kabacoff, Rob. n.d. Data Visualization with R. Accessed March 5, 2021. https://rkabacoff.github.io/datavis/.
“Rworldmap Package R Documentation.” n.d. Accessed March 5, 2021. https://www.rdocumentation.org/packages/rworldmap/versions/1.3-6.
“Statsbomb/Open-Data.” 2021. StatsBomb. https://github.com/statsbomb/open-data.